
## R script to make figures for marriage paper from
## outreg2 text files generated by Stata

## Load required packages:
## readstata13 is a package for reading Stata files
library(readstata13)
## ggplot2 is a popular plotting package by Hadley Wickham
library(ggplot2)

## Write a function to get the figure points from
## the file Stata wrote
get.fig.points <- function(filename, x, group) {
  ## This function takes as arguments
  ## the file name as a character string,
  ## a vector x of the x-values of the figure points,
  ## and the group name as a character string
  results <- read.dta13(paste(
    "/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/",
    filename,".dta",sep=""
  ))
  ## Look at just the rows with the coefficient (1),
  ## CI min (5), and CI max (6),
  ## and the columns with graph points
  ## (grepl returns a logical for whether the column name
  ## contains the string "graph")
  results <- results[c(1,5,6),grepl("graph",colnames(results))]
  ## Transpose so that the columns are the coef, CI min, and CI max
  results <- t(results)
  ## Add a first row so we start at 0
  results <- rbind(c(0,0,0),
                   results)
  ## Exponentiate, subtract one, and multiply by 100 for the figures
  results <- 100*(exp(results)-1)
  ## Rename the columns to clarify what they are
  colnames(results) <- c("pct.change","min","max")
  ## Turn it into a data frame, including the x-values for the plot
  results <- data.frame(x = x, results, group = group)
  return(results)
}
## For example, below I call the function
get.fig.points(filename = "t_a1_c2",
               x = c(-5,-1,1,5,10),
               group = "Married")
## This is equivalent to giving the same commands in order
get.fig.points("t_a1_c2",c(-5,-1,1,5,10),"Married")


## For the matching results with no controls, it's a little different because
## we want exponentiated mean log wages, rather than percent change
## Write a function to get the figure points from
## the file Stata wrote
get.match.fig.points <- function(filename, x, group) {
  ## This function takes as arguments
  ## the file name as a character string,
  ## a vector x of the x-values of the figure points,
  ## and the group name as a character string
  results <- read.dta13(paste(
    "/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/",
    filename,".dta",sep=""
  ))
  ## Transpose so that the columns are the coef, CI min, and CI max
  results <- t(results)
  ## Exponentiate to get exponentiated predicted log wages
  results <- exp(results)
  ## Rename the columns to clarify what they are
  colnames(results) <- c("pred.wages","min","max")
  ## Turn it into a data frame, including the x-values for the plot
  results <- data.frame(x = x, results, group = group)
  return(results)
}


## For the plot labels: Write two small functions
## to add "%" and "$" to the labels y-axis labels
add.pct <- function(x) paste(x,"%",sep="")
make.money <- function(x) paste("$",format(x,nsmall=2),sep="")

## Function to make the plots
make.plot <- function(results,title,n=NULL,fig.num,
                      min.year=NULL,max.year=NULL,
                      event, x.breaks = NULL) {
  ## The ifelse(logical,do1,do2) statement
  ## says to return do1 if logical=T,
  ## otherwise return do2.
  ## If min.year and max.year are manually given, use them
  ## If not, we'll use the min and max of x
  min.year <- ifelse(is.null(min.year),min(results$x),min.year)
  max.year <- ifelse(is.null(max.year),max(results$x),max.year)
  ## Make the figure
  ## The first argument is the data frame with the plot values
  ## The second argument is the aesthetics of the plot,
  ## which tells it what to look for in that data frame
  ggplot(data=results, aes(x=x, y=pct.change, group=group,
                           color=group, linetype=group)) +
    ## ggplot links commands with +. Here, we tell it
    ## to plot a line plot
    geom_line() +
    ## theme_bw is just an aesthetic option
    theme_bw() +
    ## geom_ribbon adds the CIs. alpha controls the 
    ## transparency of the ribbon
    geom_ribbon(aes(ymin = min, ymax = max, group=group,
                    fill=group),
                alpha=.1, color=NA) +
    ## Set the line types
    scale_linetype_manual(values = c(1,3,5,4)) +
    ## Set the color of the lines
    scale_color_manual(values = c("black","red","blue","green")) +
    ## Set the color of the confidence bands
    scale_fill_manual(values = c("black","red","blue","green")) +
    ## Add a vertical line in the year of marriage
    geom_vline(xintercept=0,lty=2) +
    ## Add x and y axis titles
    xlab(paste("\nYears since",event)) +
    ylab("% change in wages\n") +
    ## Add a plot title.
    ggtitle(ifelse(!is.null(n),
            paste(title,"\n(N = ",prettyNum(n,","),")",sep=""),
            title)) +
    ## Aesthetic options
    theme(text=element_text(size=10,family="Times New Roman")) +
    scale_y_continuous(label=add.pct) +
    scale_x_continuous(breaks = if (is.null(x.breaks)) waiver() else x.breaks) +
    theme(legend.position=ifelse(length(unique(results$group))==1,
                                 "none","bottom"),
          legend.title=element_blank(),
          legend.direction="vertical",
          legend.key.width=unit(1,"cm"),
          plot.title = element_text(hjust = 0.5)) +
    ## Save the plot. We may up the dpi later if necessary
    ggsave(paste("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/fig",
                 fig.num,".png",sep=""),
           width=3,
           height=ifelse(length(unique(results$group))==1,
                         3,4),
           dpi=200)
}
## Function to make the matching plots
make.match.plot <- function(results,title,n=NULL,fig.num,
                      min.year=NULL,max.year=NULL,
                      min.mar.year, max.mar.year) {
  ## The first argument is the data frame with the plot values
  ## The second argument is the aesthetics of the plot,
  ## which tells it what to look for in that data frame
  ggplot(data=results, aes(x=x, y=pred.wages, group=group,
                           color=group, linetype=group)) +
    ## ggplot links commands with +. Here, we tell it
    ## to plot a line plot
    geom_line() +
    ## theme_bw is just an aesthetic option
    theme_bw() +
    ## geom_ribbon adds the CIs. alpha controls the 
    ## transparency of the ribbon
    geom_ribbon(aes(ymin = min, ymax = max, group=group,
                    fill=group),
                alpha=.1, color=NA) +
    ## Add a shaded section for when marriages occur
    geom_rect(aes(xmin=min.mar.year,xmax=max.mar.year,ymin=-Inf,ymax=Inf,
                  show.legend=F),color=NA,alpha=.01) +
    annotate("text",
             x=(min.mar.year+max.mar.year)/2,
             y=max(results$max)-2,
             label="Marriages\noccur",size=2) +
    ## Set the line types
    scale_linetype_manual(values = c(1,3,5,4)) +
    ## Set the color of the lines
    scale_color_manual(values = c("black","red","blue","green")) +
    ## Set the color of the confidence bands
    scale_fill_manual(values = c("black","red","blue","green")) +
    ## Add x and y axis titles
    xlab("\nYear") +
    ylab("Predicted wages\n") +
    ## Add a plot title.
    ggtitle(ifelse(!is.null(n),
                   paste(title,"\n(N = ",prettyNum(n,","),")",sep=""),
                   title)) +
    ## Aesthetic options
    theme(text=element_text(size=10,family="Times New Roman"),
          plot.title = element_text(hjust = 0.5)) +
    scale_y_continuous(label=make.money) +
    theme(legend.position=ifelse(length(unique(results$group))==1,
                                 "none","bottom"),
          legend.title=element_blank(),
          legend.direction="vertical",
          legend.key.width=unit(1,"cm")) +
    ## Save the plot. We may up the dpi later if necessary
    ggsave(paste("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/fig",fig.num,".png",sep=""),
           width=3,
           height=ifelse(length(unique(results$group))==1,
                         3,4),
           dpi=200)
}

## For example, here I apply the two main functions
make.plot(results = get.fig.points(filename = "t_a1_c1",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage (linear spline)",
          n = 4218,
          fig.num = "1c",
          event = "marriage")

#########################
## PRODUCE THE FIGURES ##
#########################

## Most figures produce easily
## Code is a little more convoluted for the distributed FE
## and draws from the outreg2 text file output

## The main potential source of error is in each figure, it should reference
## "t_a#_c#", and the #s should refer to the correct appendix table and
## column number. I should also double-check the Stata file that
## the files of the form "t_a#_c#" all have the correct names.
## We will also have to manually enter the N values, which should come
## from the log file where it says "standard errors adjusted for X clusters
## in id."

## 1A
## The code here is the style of all the figures. It reads the results
## text file with predicted graph points produced by Stata.
## We need to manually enter the number of respondents in the model (n)
make.plot(results = get.fig.points(filename = "t_a1_c1",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage (linear spline)",
          n = 4218,
          fig.num = "1a",
          event = "marriage")

## 1B
make.plot(results = get.fig.points(filename = "t_a2_c1",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce (linear spline)",
          n = 1755,
          fig.num = "1b",
          event = "divorce")

## 2A
make.plot(
  results = rbind(
    get.fig.points("t_a3_c1", c(-3,-1,1,5,10),
                   group = "Marries at age 22 or under (N = 1,545)"),
    get.fig.points("t_a3_c2", c(-3,-1,1,5,10),
                   group = "Marries at age 23-26 (N = 1,301)"),
    get.fig.points("t_a3_c3", c(-3,-1,1,5,10),
                   group = "Marries at age 27+ (N = 1,372)")
  ),
  title = "By age at marriage",
  fig.num = "2a",
  event = "marriage")

## 2B
make.plot(
  results = rbind(
    get.fig.points("t_a3_c4", c(-5,-1,1,5,10),
                   group = "Non-shotgun marriage (N = 2,761)"),
    get.fig.points("t_a3_c5", c(-5,-1,1,5,10),
                   group = "Shotgun marriage (N = 556)")
  ),
  title = "By shotgun marriage",
  fig.num = "2b",
  event = "marriage")

## A1A and A1B are the same as 1A and 1B
## A1C
## The following lines are unusual, since they make figures
## for dist. FE rather than splines
## Read the dist FE results from the outreg2 text file
dist.fe.results <- read.table(paste("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/",
                                    "s_table_a1.txt",sep=""),
                              skip = 184, fill=T)
## Extract the coefficients, which are in the 2nd column in every 3rd line
## in the section where the results are
## The read in as a factor variable. Convert to a character (as.character),
## then use gsub to look for the * and replace them with nothing,
## then convert from character to numeric values.
## I manually verified this gives the correct values from the table.
coef <- c(0,as.numeric(gsub("[*]","",as.character(dist.fe.results[1+3*(0:14),2]))))
## Similarly for the SEs, except that we need to get rid of the "(" and ")"
se <- c(0,as.numeric(gsub("[(]|[)]","",as.character(dist.fe.results[2+3*(0:14),1]))))
make.plot(results = data.frame(pct.change = 100*(exp(coef)-1),
                               min = 100*(exp(coef-qnorm(.975)*se)-1),
                               max = 100*(exp(coef+qnorm(.975)*se)-1),
                               x = -5:10,
                               group = "Married"),
          title = "Marriage (distributed fixed effects)",
          n = 4218,
          fig.num = "a1c",
          event = "marriage")

## A1D
## The logic here is the same as 1a for reading the outreg2 text file
dist.fe.results <- read.table(paste("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/",
                                    "s_table_a2.txt",sep=""),
                              skip = 181, fill=T)
coef <- c(0,as.numeric(gsub("[*]","",as.character(dist.fe.results[1+3*(0:7),2]))))
se <- c(0,as.numeric(gsub("[(]|[)]","",as.character(dist.fe.results[2+3*(0:7),1]))))
make.plot(results = data.frame(pct.change = 100*(exp(coef)-1),
                               min = 100*(exp(coef-qnorm(.975)*se)-1),
                               max = 100*(exp(coef+qnorm(.975)*se)-1),
                               x = -3:5,
                               group = "Divorced"),
          title = "Divorce (distributed fixed effects)",
          n = 1755,
          fig.num = "a1d",
          event = "divorce")

## A2A
make.plot(results = get.fig.points(filename = "t_a1_c2",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage, imputed with prior wage",
          n = 4254,
          fig.num = "a2a",
          event = "marriage")

## A2B
make.plot(results = get.fig.points(filename = "t_a2_c2",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce, imputed with prior wage",
          n = 1778,
          fig.num = "a2b",
          event = "divorce")

## A2C
make.plot(results = get.fig.points(filename = "t_a1_c3",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage, imputed with\npredicted wage",
          n = 4312,
          fig.num = "a2c",
          event = "marriage")

## A2D
make.plot(results = get.fig.points(filename = "t_a2_c3",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce, imputed with\npredicted wage",
          n = 1800,
          fig.num = "a2d",
          event = "divorce")

## A3A
make.plot(results = get.fig.points(filename = "t_a6_c1",
                                   x = c(-5,-1,1,5,10,20),
                                   group = "Married"),
          title = "Marriage",
          n = 4218,
          fig.num = "a3a",
          event = "marriage")

## A3B
ggplot(data = data.frame(
  x=-5:20,
  y=read.table("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/num_mar_long.txt",
               head = T)$num1
), aes(x=x, y=y)) +
  geom_line() +
  theme_bw() +
  geom_vline(xintercept=0,lty=2) +
  xlab("\nYears since marriage") +
  ylab("Sample size\n") +
  ggtitle("Sample size by years\nsince marriage") +
  theme(text=element_text(size=10,family="Times New Roman")) +
  scale_y_continuous(limits=c(0,3000),
                     breaks=seq(0,3000,500),
                     labels=function(x) prettyNum(x,",")) +
  ggsave("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/figa3b.png",
         width=3,height=3)

## A3C
make.plot(results = get.fig.points(filename = "t_a6_c2",
                                   x = c(-3,-1,1,5,10,20),
                                   group = "Divorced"),
          title = "Divorce",
          n = 1756,
          fig.num = "a3c",
          event = "divorce")

## A3D
ggplot(data = data.frame(
  x=-5:20,
  y=read.table("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/num_div_long.txt",
               head = T)$num1
), aes(x=x, y=y)) +
  geom_line() +
  theme_bw() +
  geom_vline(xintercept=0,lty=2) +
  xlab("\nYears since divorce") +
  ylab("Sample size\n") +
  ggtitle("Sample size by years\nsince divorce") +
  theme(text=element_text(size=10,family="Times New Roman")) +
  scale_y_continuous(limits=c(0,1200),
                     breaks=seq(0,1200,200),
                     labels=function(x) prettyNum(x,",")) +
  ggsave("/Users/ilundberg/Dropbox/KillewaldLundberg_Demography/Data and code/figa3d.png",
         width=3,height=3)

## A4A
make.plot(
  results = rbind(
    get.fig.points("t_a7_c1", c(-5,-1,1,5,10),
                   group = "Egalitarian (N = 1,966)"),
    get.fig.points("t_a7_c2", c(-5,-1,1,5,10),
                   group = "Traditional (N = 2,252)")
  ),
  title = "By gender role attitudes",
  fig.num = "a4a",
  event = "marriage")

## A4B
make.plot(
  results = rbind(
    get.fig.points("t_a7_c3", c(-5,-1,1,5,10),
                   group = "Wife not employed (N = 871)"),
    get.fig.points("t_a7_c4", c(-5,-1,1,5,10),
                   group = "Wife employed part-time (N = 943)"),
    get.fig.points("t_a7_c5", c(-5,-1,1,5,10),
                   group = "Wife employed full-time (N = 1,810)")
  ),
  title = "By wife's employment",
  fig.num = "a4b",
  event = "marriage")

## A5A
make.match.plot(
  results = rbind(
    get.match.fig.points("t_a8_c1", c(1979,1982,1984,1988,1994),
                   group = "Not left at the altar (N = 60)"),
    get.match.fig.points("t_a8_c2", c(1979,1982,1984,1988,1994),
                   group = "Left at the altar (N = 60)")
  ),
  min.mar.year = 1982,
  max.mar.year = 1984,
  title = "Among those expecting marriage,\ndoes following through matter?",
  fig.num = "a5a")

## A5B
make.match.plot(
  results = rbind(
    get.match.fig.points("t_a8_c3", c(1979,1982,1984,1988,1994),
                   group = "Does not expect marriage (N = 130)"),
    get.match.fig.points("t_a8_c4", c(1979,1982,1984,1988,1994),
                   group = "Expects marriage (N = 130)")
  ),
  min.mar.year = 1982,
  max.mar.year = 1984,
  title = "Among those who marry in 1982-\n1984, does expecting it matter?",
  fig.num = "a5b")

## A6A
make.match.plot(
  results = rbind(
    get.match.fig.points("t_a8_c5", c(1979,1984,1988,1994),
                   group = "Not left at the altar (N = 192)"),
    get.match.fig.points("t_a8_c6", c(1979,1984,1988,1994),
                   group = "Left at the altar (N = 192)")
  ),
  min.mar.year = 1979,
  max.mar.year = 1984,
  title = "Among those expecting marriage,\ndoes following through matter?\n(5-year expectations)",
  fig.num = "a6a")

## A6B
make.match.plot(
  results = rbind(
    get.match.fig.points("t_a8_c7", c(1979,1984,1988,1994),
                   group = "Does not expect marriage (N = 94)"),
    get.match.fig.points("t_a8_c8", c(1979,1984,1988,1994),
                   group = "Expects marriage (N = 94)")
  ),
  min.mar.year = 1979,
  max.mar.year = 1984,
  title = "Among those who marry in 1979-\n1984, does expecting it matter?\n(5-year expectations)",
  fig.num = "a6b")

## A7A
make.plot(results = get.fig.points(filename = "t_a9_c1",
                                   x = c(-3,-1,1,5),
                                   group = "Remarried"),
          title = "2nd marriage",
          n = 1037,
          fig.num = "a7a",
          event = "2nd marriage")

## A7B
make.plot(results = get.fig.points(filename = "t_a9_c2",
                                   x = c(-5,-1,1,5,10),
                                   group = "Coresided"),
          title = "Coresidence",
          n = 4736,
          fig.num = "a7b",
          event = "coresidence")

## A8A
make.plot(results = get.fig.points(filename = "t_a1_c4",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage",
          n = 4510,
          fig.num = "a8a",
          event = "marriage")

## A8B
make.plot(results = get.fig.points(filename = "t_a2_c4",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce",
          n = 2023,
          fig.num = "a8b",
          event = "divorce")

## A9A
make.plot(
  results = rbind(
    get.fig.points("t_a10_c1", c(-5,-1,1,5,10),
                   group = "Less than high school (N = 781)"),
    get.fig.points("t_a10_c2", c(-5,-1,1,5,10),
                   group = "High school (N = 1,929)"),
    get.fig.points("t_a10_c3", c(-5,-1,1,5,10),
                   group = "Some college (N = 795)"),
    get.fig.points("t_a10_c4", c(-5,-1,1,5,10),
                   group = "College (N = 704)")
  ),
  title = "By education",
  fig.num = "a9a",
  event = "marriage")

## A9B
make.plot(
  results = rbind(
    get.fig.points("t_a11_c1", c(-5,-1,1,5,10),
                   group = "Lower tercile (0-32; N = 1,666)"),
    get.fig.points("t_a11_c2", c(-5,-1,1,5,10),
                   group = "Middle tercile (32-68; N = 1,278)"),
    get.fig.points("t_a11_c3", c(-5,-1,1,5,10),
                   group = "Upper tercile (68-100; N = 1,028)")
  ),
  title = "By AFQT tercile",
  fig.num = "a9b",
  event = "marriage")

## A9C
make.plot(
  results = rbind(
    get.fig.points("t_a12_c1", c(-3,-1,1,5,10),
                   group = "Hispanic (N = 720)"),
    get.fig.points("t_a12_c2", c(-3,-1,1,5,10),
                   group = "Black (N = 931)"),
    get.fig.points("t_a12_c3", c(-3,-1,1,5,10),
                   group = "White/other (N = 2,567)")
  ),
  title = "By race",
  fig.num = "a9c",
  event = "marriage")

## A10A
make.plot(
  results = rbind(
    get.fig.points("t_a5_c1", c(-5,-1,1,5,10),
                   group = "1st quartile of P(marriage) (N = 612)"),
    get.fig.points("t_a5_c2", c(-5,-1,1,5,10),
                   group = "Middle 50% of P(marriage) (N = 1,569)"),
    get.fig.points("t_a5_c3", c(-5,-1,1,5,10),
                   group = "4th quartile of P(marriage) (N = 868)")
  ),
  title = "By probability of marriage",
  fig.num = "a10a",
  event = "marriage")

## A10B
make.plot(
  results = rbind(
    get.fig.points("t_a14_c1", c(-5,-1,1,5,10),
                   group = "Ever married (N = 645)"),
    get.fig.points("t_a14_c2", c(-5,-1,1,5,10),
                   group = "Never married (N = 645)")
  ),
  title = "Matched comparison to\nnever-married men",
  fig.num = "a10b",
  event = "marriage")


## A11A
make.plot(results = get.fig.points(filename = "t_a15_c1",
                                   x = c(-5,-1,1,5,10),
                                   group = "Married"),
          title = "Marriage",
          n = 4218,
          fig.num = "a11a",
          event = "marriage")

## A11B
make.plot(results = get.fig.points(filename = "t_a15_c2",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce",
          n = 1755,
          fig.num = "a11b",
          event = "divorce")

## A12A
make.plot(
  results = rbind(
    get.fig.points("t_a15_c3", c(-3,-1,1,5,10),
                   group = "Marries at age 22 or under (N = 1,545)"),
    get.fig.points("t_a15_c4", c(-3,-1,1,5,10),
                   group = "Marries at age 23-26 (N = 1,301)"),
    get.fig.points("t_a15_c5", c(-3,-1,1,5,10),
                   group = "Marries at age 27+ (N = 1,372)")
  ),
  title = "By age at marriage",
  fig.num = "a12a",
  event = "marriage")

## A12B
make.plot(
  results = rbind(
    get.fig.points("t_a15_c6", c(-5,-1,1,5,10),
                   group = "Non-shotgun marriage (N = 2,761)"),
    get.fig.points("t_a15_c7", c(-5,-1,1,5,10),
                   group = "Shotgun marriage (N = 556)")
  ),
  title = "By shotgun marriage",
  fig.num = "a12b",
  event = "marriage")


## A13A
make.plot(results = get.fig.points(filename = "t_a16_c1",
                                   x = c(-5,-1,1,5),
                                   group = "Married"),
          title = "Marriage",
          n = 1614,
          fig.num = "a13a",
          event = "marriage",
          x.breaks = c(-5,0,5))

## A13B
make.plot(results = get.fig.points(filename = "t_a16_c2",
                                   x = c(-3,-1,1,5),
                                   group = "Divorced"),
          title = "Divorce",
          n = 314,
          fig.num = "a13b",
          event = "divorce")

## A14
make.plot(
  results = rbind(
    get.fig.points("t_a17_c1", c(-5,-1,1,5),
                   group = "Wife not employed (N = 404)"),
    get.fig.points("t_a17_c2", c(-5,-1,1,5),
                   group = "Wife employed part-time (N = 209)"),
    get.fig.points("t_a17_c3", c(-5,-1,1,5),
                   group = "Wife employed full-time (N = 829)")
  ),
  title = "By wife's employment, NLSY97",
  fig.num = "a14",
  event = "marriage")
